Preprocessing APA PsycInfo

Show code
library(tidyverse)
## get my transliteration table (I tried to guess the PsycInfo ASCII name from the PsycTESTS name)
translit <- readRDS("../sober_rubric/raw_data/psycinfo_psyctests_names.rds")

## get our first scrape (by journal, checking counts for each year in each journal for top tests)
psycinfo_scrape_by_journal <- read_tsv('../sober_rubric/raw_data/merged_table_all.tsv') %>% 
  drop_na(Name) %>% 
  # this tsv can be found in "Scraping-EBSCO-Host\data\merged tables"
#  mutate(Name = toTitleCase(Name)) %>% 
  rename(usage_count = "Hit Count") %>% 
  group_by(Name, Year) %>% 
  summarise(usage_count = sum(usage_count))

## get our second scrape (by test DOI and year)
overview <- readr::read_tsv("../sober_rubric/raw_data/20230617_ebsco_scrape_clean_overview_table_1.tsv")
byyear <- readr::read_tsv("../sober_rubric/raw_data/20230617_ebsco_scrape_table_years_1.tsv")
byyear %>% group_by(DOI) %>% summarise(Hits = sum(Hits, na.rm=T)) %>% pull(Hits) %>% table()
.
    0     1     2     3     4     5     6     7     8     9    10 
   27 13280  4107  2140  1487  1077   864   645   570   464   375 
   11    12    13    14    15    16    17    18    19    20    21 
  375   285   243   237   220   168   180   163   114   141   132 
   22    23    24    25    26    27    28    29    30    31    32 
  102   108   113   108    83    91    72    86    88    68    81 
   33    34    35    36    37    38    39    40    41    42    43 
   77    68    61    45    56    48    42    60    48    37    45 
   44    45    46    47    48    49    50    51    52    53    54 
   38    42    41    34    29    29    33    35    26    31    25 
   55    56    57    58    59    60    61    62    63    64    65 
   21    22    32    19    37    26    23    18    24    16    25 
   66    67    68    69    70    71    72    73    74    75    76 
   19    19    22    19    27    18    18    11    12    12    16 
   77    78    79    80    81    82    83    84    85    86    87 
   11    15    22    16    14    10    13    16    10    13     6 
   88    89    90    91    92    93    94    95    96    97    98 
   10    13    11    10     8    13    14    11    10    17    12 
   99   100   101   102   103   104   105   106   107   108   109 
   11    10    13    12     6     8     8    13     9    13     8 
  110   111   112   113   114   115   116   117   118   119   120 
    6     9     6     7     8     4     5     5    13     8     7 
  121   122   123   124   125   126   127   128   129   130   131 
    7     6    10     9     7     3    13     4     4    11     6 
  132   133   134   135   136   137   138   139   140   141   142 
    4     3     6     5     7     3     6     4     3     8     7 
  143   144   145   146   147   148   149   150   151   152   153 
    9     9     4     8     3     9     4     7     9     6     5 
  154   155   156   157   158   159   160   161   162   163   164 
    5     3     6     5     5     5     4     6     3     3     4 
  165   166   167   168   169   170   171   172   173   174   175 
    3     3     5     1     2     5     3     3     3     3     5 
  176   177   178   179   180   181   182   183   184   185   186 
    2     2     2     4     8     5     4     4     6     5     2 
  187   189   190   191   192   193   194   195   196   197   198 
    1     3     5     6     1     6     4     5     4     4     1 
  199   200   201   202   203   204   205   206   207   208   209 
    1     3     3     5     1     3     3     3     5     2     5 
  210   211   212   213   214   215   216   218   219   220   221 
    3     7     1     3     4     2     3     4     3     3     4 
  222   223   224   225   226   227   228   230   231   233   234 
    1     2     6     4     1     1     3     1     4     2     3 
  235   236   237   238   239   240   241   242   244   245   246 
    2     2     1     1     4     6     2     1     1     4     4 
  247   248   249   251   252   254   255   256   257   258   259 
    1     1     1     2     1     1     2     3     1     2     3 
  260   262   263   264   266   267   268   269   270   271   272 
    3     4     3     1     1     2     1     2     2     1     3 
  274   275   276   278   279   280   282   283   284   285   286 
    3     1     2     4     4     2     2     2     2     2     1 
  287   288   290   291   292   293   294   295   296   297   298 
    2     1     1     2     1     3     3     1     2     2     2 
  299   300   304   305   307   308   309   311   312   313   314 
    3     1     1     1     1     4     1     1     1     1     1 
  315   316   318   319   320   322   324   325   326   327   329 
    3     2     1     3     4     2     1     2     1     1     2 
  330   331   332   333   334   337   338   339   341   342   346 
    1     2     4     1     1     1     1     1     1     1     2 
  347   348   349   353   358   359   361   363   364   367   368 
    1     1     1     1     3     2     2     1     2     1     1 
  371   372   376   377   379   380   384   387   389   392   393 
    2     1     1     2     1     1     2     2     1     1     1 
  394   396   397   398   400   401   405   407   408   411   414 
    1     1     2     1     2     1     2     2     1     1     1 
  415   418   419   423   424   428   429   430   431   436   437 
    1     1     1     1     1     1     1     1     2     1     1 
  438   441   443   445   446   451   452   456   460   462   464 
    2     3     2     1     2     1     1     1     1     1     1 
  466   470   483   485   486   488   491   495   499   500   504 
    2     1     1     1     1     1     1     1     1     3     1 
  512   518   519   520   528   529   532   534   535   537   538 
    1     1     1     1     2     1     1     1     1     1     1 
  539   540   542   544   545   546   550   553   554   556   561 
    1     1     1     2     1     1     1     1     1     1     1 
  562   568   569   570   574   577   584   585   589   595   597 
    1     1     1     1     2     1     1     1     1     1     1 
  598   600   601   603   604   623   626   627   631   632   633 
    1     1     1     1     1     1     1     1     2     1     1 
  639   642   656   658   660   661   662   669   671   675   677 
    1     2     1     1     1     1     1     1     1     1     1 
  678   679   682   686   688   696   698   700   709   710   712 
    1     1     1     1     1     1     1     1     1     1     1 
  714   716   718   720   722   724   725   727   728   730   732 
    1     2     2     1     1     1     1     1     2     1     1 
  733   755   761   762   764   772   773   780   783   794   796 
    1     1     1     1     1     1     1     1     1     1     2 
  800   808   812   813   816   819   825   840   844   845   847 
    1     2     1     1     2     1     1     1     1     1     2 
  848   849   856   862   871   886   891   908   911   915   919 
    1     1     1     1     1     1     1     2     1     1     1 
  928   933   934   935   950   959   969   973   974   981   988 
    1     2     1     2     1     1     2     2     1     1     1 
  992   993  1009  1015  1018  1043  1071  1074  1077  1119  1121 
    1     1     1     1     1     1     1     1     1     1     1 
 1131  1135  1161  1163  1172  1173  1181  1184  1219  1224  1247 
    1     1     1     1     1     1     1     1     1     1     1 
 1251  1253  1255  1267  1296  1300  1323  1340  1378  1380  1392 
    1     1     1     1     1     1     1     1     1     1     1 
 1395  1399  1402  1429  1470  1479  1487  1519  1521  1553  1562 
    1     1     1     1     1     1     1     2     1     1     1 
 1569  1579  1642  1648  1688  1748  1772  1825  1868  1901  1932 
    1     1     1     1     1     1     1     1     1     1     1 
 1937  2052  2065  2074  2102  2121  2130  2132  2149  2200  2254 
    1     1     1     1     1     1     1     1     1     1     1 
 2304  2352  2584  2678  2700  2847  3053  3067  3134  3157  3487 
    1     1     1     1     1     1     1     1     1     1     1 
 3500  3637  3675  3750  3790  4041  4096  4410  4484  4876  4888 
    1     1     1     1     1     1     1     1     1     1     1 
 5147  6257  6313  6365  6408  6494  7023  7095  7238  7504  7597 
    1     1     1     1     1     1     1     1     1     1     1 
 8420  8513  8709  9492 10896 12134 13316 14268 18484 25118 
    1     1     1     1     1     1     1     1     1     1 
Show code
one_hit_wonders <- overview %>% filter(Hits == 1) %>% 
  mutate(Year = first_pub_year) %>% 
  mutate(Hits = coalesce(Hits, 1))
# for some few, the call was repeated by year for some reason
one_hit_wonders %>% select(DOI, first_pub_year) %>% inner_join(byyear, by = "DOI") %>% arrange(DOI)
# A tibble: 13,280 × 4
   DOI                first_pub_year  Year  Hits
   <chr>                       <dbl> <dbl> <dbl>
 1 10.1037/t00002-000           2014  2014     1
 2 10.1037/t00046-000           2009  2009     1
 3 10.1037/t00053-000           2013  2013     1
 4 10.1037/t00077-000           2015  2015     1
 5 10.1037/t00113-000           2012  2012     1
 6 10.1037/t00119-000           2015  2015     1
 7 10.1037/t00127-000           1949  1949     1
 8 10.1037/t00128-000           2014  2014     1
 9 10.1037/t00131-000           1961  1961     1
10 10.1037/t00193-000           2016  2016     1
# ℹ 13,270 more rows
Show code
byyear <- byyear %>% anti_join(one_hit_wonders, by = "DOI")

psycinfo_by_doi <- one_hit_wonders %>% 
  select(DOI, Year, Hits) %>% 
  bind_rows(byyear) %>% 
  left_join(overview %>% rename(total_hits = Hits), by = "DOI")


## don't use tests with names that occur many times
dupe_names <- translit %>% group_by(name_psycinfo) %>% filter(n() > 1) %>% ungroup()
translit <- translit %>% group_by(name_psycinfo) %>% 
  mutate(non_unique_name = n() > 1) %>% 
  filter(row_number() == 1) %>% ungroup()

# merge it all
psycinfo <- psycinfo_by_doi %>% 
  full_join(translit %>% select(DOI, name_psycinfo, NameOC), by = "DOI") %>% 
  full_join(psycinfo_scrape_by_journal, by = c("name_psycinfo" = "Name", "Year")) %>% 
  rename(hits_scrape_1 = usage_count,
         hits_scrape_2 = Hits,
         total_hits_scrape_2 = total_hits) %>% 
  group_by(name_psycinfo) %>% 
  mutate(total_hits_scrape_1 = sum(hits_scrape_1))
psycinfo %>% is.na() %>% colSums()
                DOI                Year       hits_scrape_2 
              96747               39022              135768 
     first_pub_year       last_pub_year total_hits_scrape_2 
             135768              135768              135768 
      name_psycinfo              NameOC       hits_scrape_1 
               3079               99825              218121 
total_hits_scrape_1 
             265989 
Show code
## aggregate it all
psycinfo_overall <- psycinfo %>% 
  group_by(name_psycinfo) %>% 
  summarise(total_hits_scrape_1 = sum(hits_scrape_1, na.rm = T),
         total_hits_scrape_2 = sum(hits_scrape_2, na.rm = T)) %>% 
  left_join(translit %>% select(DOI, name_psycinfo))

## correlate totals
cor.test(psycinfo_overall$total_hits_scrape_1, psycinfo_overall$total_hits_scrape_2)

    Pearson's product-moment correlation

data:  psycinfo_overall$total_hits_scrape_1 and psycinfo_overall$total_hits_scrape_2
t = 249.62, df = 104320, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6076982 0.6152964
sample estimates:
      cor 
0.6115114 
Show code
psycinfo_overall %>% 
  filter(total_hits_scrape_1 > 0, total_hits_scrape_2 > 0) %>% 
  summarise(cor(total_hits_scrape_1, total_hits_scrape_2))
# A tibble: 1 × 1
  `cor(total_hits_scrape_1, total_hits_scrape_2)`
                                            <dbl>
1                                           0.904
Show code
## correlate by year, diffs, proportions
cor.test(psycinfo$hits_scrape_1, psycinfo$hits_scrape_2)

    Pearson's product-moment correlation

data:  psycinfo$hits_scrape_1 and psycinfo$hits_scrape_2
t = 467.52, df = 39014, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9196473 0.9226533
sample estimates:
     cor 
0.921164 
Show code
psycinfo %>%  mutate(diff = hits_scrape_2 - hits_scrape_1) %>% pull(diff) %>% abs() %>% mean(na.rm=T)
[1] 12.3914
Show code
psycinfo %>%  mutate(prop = hits_scrape_2/ hits_scrape_1) %>% pull(prop) %>%  qplot() + scale_x_log10()
Show code
psycinfo %>%  mutate(diff = hits_scrape_2 - hits_scrape_1) %>% pull(diff) %>%  mean(na.rm=T)
[1] 11.99798
Show code
# psycinfo %>% filter(hits_scrape_1 > hits_scrape_2) %>% select(DOI, Year, name_psycinfo, NameOC, hits_scrape_1, hits_scrape_2) %>% mutate(diff = hits_scrape_2 - hits_scrape_1) %>% arrange(diff) %>% View()

psycinfo %>% filter(hits_scrape_1 < hits_scrape_2) %>% nrow()
[1] 27545
Show code
psycinfo %>%  mutate(diff = hits_scrape_2 - hits_scrape_1) %>% pull(diff) %>% table() %>% sort()
.
-165 -143  -99  -98  -84  -81  -73  -50  -43  -41  -39  -35  -31  -27 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 -23  -21  -19  -15  -13  140  143  154  179  182  186  190  195  206 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 216  226  228  233  239  241  243  246  248  250  253  257  258  260 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 262  263  265  268  269  274  278  281  284  285  287  290  293  294 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 298  301  302  307  311  312  313  316  319  325  326  327  328  332 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 334  335  337  340  344  347  350  351  355  358  359  365  376  379 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 381  383  394  396  398  400  404  406  410  413  414  416  417  418 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 421  428  429  430  432  433  434  437  439  441  443  446  449  460 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 462  466  474  490  493  495  496  502  510  511  512  516  526  531 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 539  553  562  563  567  571  577  586  590  602  604  613  633  639 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 640  644  655  659  661  683  691  700  701  704  714  736  765  771 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 775  791  804  806  828  854  858  865  879  919  950  955  965  966 
   1    1    1    1    1    1    1    1    1    1    1    1    1    1 
 976 1004 1005 1265 1335 1591  -96  -17  -16  -12  105  119  135  136 
   1    1    1    1    1    1    2    2    2    2    2    2    2    2 
 141  153  157  159  160  162  164  165  167  169  172  173  174  180 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2 
 183  191  197  198  200  207  211  217  218  225  232  235  236  238 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2 
 244  256  261  267  270  272  273  282  288  295  304  305  306  317 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2 
 318  322  339  342  346  349  352  369  373  375  380  385  392  407 
   2    2    2    2    2    2    2    2    2    2    2    2    2    2 
 408  431  436  438  440  450  456  548  680  -11   -9   99  118  138 
   2    2    2    2    2    2    2    2    2    3    3    3    3    3 
 146  158  171  177  178  185  189  192  196  199  202  204  205  208 
   3    3    3    3    3    3    3    3    3    3    3    3    3    3 
 215  219  220  222  223  234  247  254  264  275  279  286  297  303 
   3    3    3    3    3    3    3    3    3    3    3    3    3    3 
 309  329  336  356  367  374  382  537  -14  -10  132  134  142  144 
   3    3    3    3    3    3    3    3    4    4    4    4    4    4 
 145  148  150  163  170  176  187  188  193  194  224  255  366   -8 
   4    4    4    4    4    4    4    4    4    4    4    4    4    5 
  97  126  129  139  149  152  155  156  161  166  168  175  181  184 
   5    5    5    5    5    5    5    5    5    5    5    5    5    5 
 209  229  231  107  109  111  116  125  130  151  103  104  113  117 
   5    5    5    6    6    6    6    6    6    6    7    7    7    7 
 120  121  127  133  137  147   89  115  123   -7  110  112  114  124 
   7    7    7    7    7    7    8    8    8    9    9    9    9    9 
 131   86   90   92  100  101  108  122  128   74   87   88   95   96 
   9   10   10   10   10   10   10   10   10   11   11   11   11   11 
 102   91   93   94  106   77   85   98   63   82   72   80   81   83 
  11   12   12   12   12   13   14   14   15   15   16   16   16   16 
  84   76   70   73   79   -6   65   64   69   75   78   60   68   71 
  16   17   18   18   19   20   20   21   21   22   23   24   24   24 
  66   67   62   -5   59   61   55   57   58   50   53   56   46   52 
  26   28   29   30   31   32   33   33   34   35   35   39   43   45 
  54   43   40   48   51   49   45   42   44   41   47   39   38   -4 
  45   46   47   47   48   50   52   55   58   59   61   62   69   70 
  37   34   36   35   33   32   30   29   31   27   28   26   25   23 
  76   77   77   85   96  101  102  107  108  112  123  142  157  163 
  -3   24   22   21   20   19   18   17   16   15   14   13   12   11 
 165  166  183  200  225  248  268  285  311  357  383  431  544  591 
  -2   10    9    8    7    6    5    4    3    2   -1    1    0 
 615  688  765  933 1066 1228 1589 2033 2638 3487 3759 4918 6757 
Show code
# psycinfo %>% filter(hits_scrape_1 < hits_scrape_2) %>% select(DOI, Year, name_psycinfo, NameOC, hits_scrape_1, hits_scrape_2) %>% mutate(diff = hits_scrape_2 - hits_scrape_1) %>% arrange(diff) %>% View()

Top Tests in each

Only in PsycInfo Scrape 1

Show code
psycinfo_overall %>% 
  ungroup() %>% 
  filter(total_hits_scrape_1 > 0,
         total_hits_scrape_2 == 0) %>% 
  summarise(n(), sum(total_hits_scrape_1), sum(total_hits_scrape_1)/n())
# A tibble: 1 × 3
  `n()` `sum(total_hits_scrape_1)` `sum(total_hits_scrape_1)/n()`
  <int>                      <dbl>                          <dbl>
1 35864                     254316                           7.09
Show code
options(cols.min.print = 2, cols.print = 2)
Show code
psycinfo_overall %>% 
  ungroup() %>% 
  # filter(is.na(DOI)) %>%
  filter(total_hits_scrape_2 == 0, total_hits_scrape_1 >= 1) %>% 
  arrange(desc(total_hits_scrape_1)) %>% 
  select(name_psycinfo, total_hits_scrape_1) %>% 
  arrange(desc(total_hits_scrape_1)) %>% 
  DT::datatable()

Only in PsycTests Scrape 2

Show code
psycinfo_overall %>% 
  ungroup() %>% 
  filter(total_hits_scrape_1 == 0,
         total_hits_scrape_2 > 0) %>% 
  summarise(n(), sum(total_hits_scrape_2), sum(total_hits_scrape_2)/n())
# A tibble: 1 × 3
  `n()` `sum(total_hits_scrape_2)` `sum(total_hits_scrape_2)/n()`
  <int>                      <dbl>                          <dbl>
1 22052                      57411                           2.60
Show code
psycinfo_overall %>% 
  ungroup() %>% 
  filter(total_hits_scrape_1 == 0, total_hits_scrape_2 >= 1) %>% 
  # filter(!is.na(DOI), is.na(total_hits_scrape_1) | total_hits_scrape_1 == 0) %>% 
  drop_na(name_psycinfo, total_hits_scrape_2) %>% 
  arrange(desc(total_hits_scrape_2)) %>% 
  select( name_psycinfo, total_hits_scrape_2) %>% 
  DT::datatable()

Hits only in scrape 1, even though we have a match for the name

Show code
psycinfo_overall %>% 
  ungroup() %>% 
  filter(!is.na(DOI),
         total_hits_scrape_1 > 0,
         total_hits_scrape_2 == 0) %>% 
  summarise(n(), sum(total_hits_scrape_1), sum(total_hits_scrape_1)/n())
# A tibble: 1 × 3
  `n()` `sum(total_hits_scrape_1)` `sum(total_hits_scrape_1)/n()`
  <int>                      <dbl>                          <dbl>
1  1453                      22239                           15.3

Hits only in scrape 1 without a clear match for the name

Show code
psycinfo_overall %>% 
  ungroup() %>% 
  filter(is.na(DOI),
         total_hits_scrape_1 > 0,
         total_hits_scrape_2 == 0) %>% 
  summarise(n(), sum(total_hits_scrape_1), sum(total_hits_scrape_1)/n())
# A tibble: 1 × 3
  `n()` `sum(total_hits_scrape_1)` `sum(total_hits_scrape_1)/n()`
  <int>                      <dbl>                          <dbl>
1 34411                     232077                           6.74

Merge Scrape 1 and 2

Show code
psycinfo_scrape_1_without_hits_in_2 <- psycinfo_overall %>% 
    ungroup() %>% 
    filter(total_hits_scrape_1 > 0, is.na(total_hits_scrape_2) | total_hits_scrape_2 == 0) %>% 
    select(DOI, name_psycinfo) %>% 
    distinct(name_psycinfo, .keep_all = TRUE) %>% 
    left_join(psycinfo_scrape_by_journal %>% 
     rename(name_psycinfo = Name, Hits = usage_count), by = "name_psycinfo", multiple = "all") %>% 
    mutate(DOI = coalesce(DOI, name_psycinfo)) %>% 
    group_by(DOI) %>% 
    mutate(first_pub_year = min(Year, na.rm = T),
           last_pub_year = max(Year, na.rm = T),
           total_hits = sum(Hits, na.rm = T)) %>% 
  ungroup()

psycinfo_scrape_1_without_hits_in_2 %>% 
  summarise(n_distinct(DOI), sum(Hits), sum(Hits)/n_distinct(DOI))
# A tibble: 1 × 3
  `n_distinct(DOI)` `sum(Hits)` `sum(Hits)/n_distinct(DOI)`
              <int>       <dbl>                       <dbl>
1             35864      254316                        7.09
Show code
psycinfo_by_doi_with_hits <- psycinfo_by_doi %>%
  drop_na(Hits, Year) %>% 
  anti_join(psycinfo_overall %>% filter(total_hits_scrape_2 == 0) %>% select(DOI), by = "DOI") %>% 
  left_join(translit %>% select(DOI, name_psycinfo), by = "DOI")
sum(is.na(psycinfo_by_doi_with_hits$name_psycinfo))
[1] 3078
Show code
sum(!is.na(psycinfo_by_doi_with_hits$name_psycinfo))
[1] 215037
Show code
psycinfo_by_doi_with_hits %>% 
  summarise(n_distinct(DOI), sum(Hits, na.rm = T), sum(Hits, na.rm = T)/n_distinct(DOI))
# A tibble: 1 × 3
  `n_distinct(DOI)` `sum(Hits, na.rm = T)` sum(Hits, na.rm = T)/n_di…¹
              <int>                  <dbl>                       <dbl>
1             31118                 876009                        28.2
# ℹ abbreviated name: ¹​`sum(Hits, na.rm = T)/n_distinct(DOI)`
Show code
psycinfo_merged <- bind_rows(
  scrape_2 = psycinfo_by_doi_with_hits, 
  scrape_1 = psycinfo_scrape_1_without_hits_in_2, .id = "source")

psycinfo_merged %>% 
  summarise(n_distinct(DOI), sum(Hits, na.rm = T), sum(Hits, na.rm = T)/n_distinct(DOI))
# A tibble: 1 × 3
  `n_distinct(DOI)` `sum(Hits, na.rm = T)` sum(Hits, na.rm = T)/n_di…¹
              <int>                  <dbl>                       <dbl>
1             66982                1130325                        16.9
# ℹ abbreviated name: ¹​`sum(Hits, na.rm = T)/n_distinct(DOI)`
Show code
saveRDS(psycinfo_merged, "../sober_rubric/raw_data/psycinfo_merged_scrape_1_and_2.rds")

Joint top list

Show code
psycinfo_merged %>% 
  group_by(DOI, name_psycinfo, source) %>%
  summarise(total_hits = sum(Hits, na.rm  = T)) %>% 
  arrange(desc(total_hits)) %>% 
  ungroup() %>% 
  select( source, name_psycinfo, total_hits) %>% 
  DT::datatable()